In this brief example, I show how to use the GPS coordinates from the Demographic and Health Survey data and merge them to the ADM2 subnational geographic level for the country of Ethopia. Then I produce estimates using the DHS data for ADM 2 regions of the country.
This is possible by useing the GIS capacity of the sf
package to spatially intersect the DHS points and the ADM 2
polygons.
library(sf)
## Warning: package 'sf' was built under R version 4.1.3
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(mapview)
ethpoly <- st_read(dsn = "C:/Users/ozd504//OneDrive - University of Texas at San Antonio//students/fikre/spatial_epi/ETH_adm2.shp")
## Reading layer `ETH_adm2' from data source
## `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\spatial_epi\ETH_adm2.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 72 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 32.99994 ymin: 3.322099 xmax: 47.98618 ymax: 14.89996
## Geodetic CRS: WGS 84
ethpoly$struct <- 1:dim(ethpoly)[1]
plot(ethpoly["struct"])
The adm2 shapefile can be found in the Diva GIS international data repository, or from the IPUMS International site below I use the ADM2 level of administrative geography.
These locations are not identified in the DHS, but by performing a spatial intersection, we can merge the DHS survey locations to the ADM 2 units
eth_dots<-st_read("C:/Users/ozd504//OneDrive - University of Texas at San Antonio//students//fikre/ethiopia_dhs/ETGE52FL/ETGE52FL.shp")
## Reading layer `ETGE52FL' from data source
## `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\ethiopia_dhs\ETGE52FL\ETGE52FL.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 535 features and 20 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 0 ymin: 0 xmax: 43.36409 ymax: 14.50379
## Geodetic CRS: WGS 84
eth_dots <- eth_dots[eth_dots$LATNUM>0,]
eth_adm2<-st_read("C:/Users/ozd504//OneDrive - University of Texas at San Antonio//students//fikre/spatial_epi/ETH_adm2.shp")
## Reading layer `ETH_adm2' from data source
## `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\spatial_epi\ETH_adm2.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 72 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 32.99994 ymin: 3.322099 xmax: 47.98618 ymax: 14.89996
## Geodetic CRS: WGS 84
#merge dots to administrative data
eth_dots2000<-st_intersection(eth_dots, eth_adm2)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
mapview(eth_dots["DHSCLUST"])+mapview(eth_adm2["NAME_2"])
Next, I use the DHS survey data to estimate the prevalence of stunting in the ADM 2 regions.
library(haven)
dhs2000<-read_dta("C:/Users/ozd504//OneDrive - University of Texas at San Antonio//students//fikre/ethiopia_dhs/ETKR41DT/ETKR41FL.DTA")
dhs2000<-zap_labels(dhs2000)
library(car)
## Loading required package: carData
dhs2000$stunting<-ifelse(dhs2000$hw5/100<=-2&dhs2000$hw5/100!=-2,1,0)
#dhs2000$sex<-dhs2000$hc27
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dhs2000<-dhs2000%>%
mutate(wt = v005/1000000)%>%
filter(complete.cases(stunting))%>%
select(v001,stunting, wt, v021, v022)
dhs2000m<-merge(dhs2000, eth_dots2000, by.x="v001", by.y="DHSCLUST")
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.1.3
## Loading required package: survival
## Warning: package 'survival' was built under R version 4.1.3
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
options(survey.lonely.psu = "adjust")
des<-svydesign(ids = ~v021, strata = ~v022, weights = ~wt, data=dhs2000m)
names(dhs2000m)
## [1] "v001" "stunting" "wt" "v021" "v022"
## [6] "DHSID" "DHSCC" "DHSYEAR" "CCFIPS" "ADM1FIPS"
## [11] "ADM1FIPSNA" "ADM1SALBNA" "ADM1SALBCO" "ADM1DHS" "ADM1NAME"
## [16] "DHSREGCO" "DHSREGNA" "SOURCE" "URBAN_RURA" "LATNUM"
## [21] "LONGNUM" "ALT_GPS" "ALT_DEM" "DATUM" "ID_0"
## [26] "ISO" "NAME_0" "ID_1" "NAME_1" "ID_2"
## [31] "NAME_2" "TYPE_2" "ENGTYPE_2" "NL_NAME_2" "VARNAME_2"
## [36] "geometry"
est.stunt <- svyby(~stunting, ~ID_2, des, FUN=svymean, na.rm=T)
head(est.stunt)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(mapview)
mdat<- geo_join(ethpoly, est.stunt, "ID_2","ID_2")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
mapview(mdat["stunting"])